suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(hablar))
suppressPackageStartupMessages(library(tsibble))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(DT))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(ggbiplot))
The module2 of the survey on the perception of risk and benefit from energy technologies in India.
Following is the complete data set with 1099 responses.
moduletwo <- read_excel( "~/Desktop/thesis_surveydata/political_factors_complete.xlsx")
save(moduletwo, file = "moduletwo.rda")
module2 <- data.frame(moduletwo)
module2 %>%
filter(!row_number() %in% c(1,2))
# loaded data set for module 2 eco -pol variables and removes the rows with question number and question text
Following graphs show the distribution of percerived risk and benefit on the likert scale
#plotting perceived risk from different technologies
module2%>%
filter(!row_number() %in% c(1,2)) %>%
select(starts_with("RISKY"))%>%
gather(Technology, Perceived_Risk, Risky_Hydro:Risky_INDLPG, factor_key = TRUE)%>%
separate(Technology, c("Risky", "Technology"), extra = "merge", fill = "left")%>%
select(-Risky) %>%
group_by(Technology, Perceived_Risk) %>%
dplyr::summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
colorsequence<- c("#8C4646","#D9695F","#F2A679","#F2D091","#5D8C7B","#808080")
df_barplot<-module2%>%
filter(!row_number() %in% c(1,2)) %>%
select(starts_with("RISKY"))%>%
gather(Technology, Perceived_Risk, Risky_Hydro:Risky_INDLPG, factor_key = TRUE)%>%
separate(Technology, c("Risky", "Technology"), extra = "merge", fill = "left")%>%
select(-Risky) %>%
group_by(Technology, Perceived_Risk) %>%
dplyr::summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
df_barplot %>% ggplot(aes(fill= fct_rev(factor(Perceived_Risk, levels=c("Extremely risky", "Very risky","Moderately risky","Slightly risky","Not at all risky","Rather not say/Don't know"))), y=n , x=Technology)) +
geom_bar(position="fill",stat="identity")+
scale_x_discrete(limits = c("Solar", "INDSolar", "INDFirewoodetc" ,"Wind" , "Hydro", "INDWind", "INDHydro", "INDDiesel" , "INDBiogas","INDKerosene", "Oil", "Coal","Gas", "INDLPG","Nuclear"))+
scale_fill_manual("legend", values = c("Extremely risky" = "#8C4646", "Very risky" = "#D9695F", "Moderately risky" = "#F2A679", "Slightly risky" = "#F2D091","Not at all risky" = "#5D8C7B", "Rather not say/Don't know" = "#808080"))+
coord_flip()+
theme_classic()
colorsequence<- c("#8C4646","#D9695F","#F2A679","#F2D091","#5D8C7B","#808080")
df_barplotben <- module2%>%
filter(!row_number() %in% c(1,2)) %>%
select(starts_with("Ben"))%>%
gather(Technology, Perceived_Benefit, Ben_Hydro:Ben_INDKerosene, factor_key = TRUE)%>%
separate(Technology, c("Ben", "Technology"), extra = "merge", fill = "left")%>%
select(-Ben) %>%
group_by(Technology, Perceived_Benefit) %>%
dplyr::summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
df_barplotben %>% ggplot(aes(fill= factor(Perceived_Benefit, levels=c("Rather not say/Don't know","Not at all beneficial","Slightly beneficial","Moderately beneficial","Very beneficial", "Extremely beneficial")), y=n , x=Technology)) +
geom_bar(position="fill",stat="identity")+
scale_fill_manual("legend", values = c("Rather not say/Don't know" = "grey94" ,"Not at all beneficial" = "orange","Slightly beneficial"="yellow" ,"Moderately beneficial" = "palegreen","Very beneficial"= "green3", "Extremely beneficial"= "green4"))+
theme_classic()+
coord_flip()
# fix Rather not say issue.
Now I am converting the likert scale for various scales used in the survey to numeric values from 1 to 5, Rather Not Say/Don’t know is printed as NA. Some variables had to be reverse coded as well.
#This chunk contains the coding schemes of various scales used in survey one: eco-political factors, kahan scale and acceptance scale
codedmodule2 <- module2 %>%
#remove row 1
filter(!row_number() %in% c(1,2)) %>%
# replace risky likert scale with numbers
mutate_at(vars(starts_with("Risky")), funs(case_when(. =="Not at all risky" ~ 1,
. =="Slightly risky" ~ 2,
. =="Moderately risky" ~ 3,
. =="Very risky" ~ 4,
. =="Extremely risky" ~ 5))) %>%
# replace beneficial likert scale with numbers
mutate_at(vars(starts_with("Ben")), funs(case_when(. =="Not at all beneficial" ~ 1,
. =="Slightly beneficial" ~ 2,
. =="Moderately beneficial" ~ 3,
. =="Very beneficial" ~ 4,
. =="Extremely beneficial" ~ 5 ))) %>%
# replace nuclear acceptance likert scale with numbers
mutate_at(vars(N_accept,N_reluctantlyaccept,N_reject), funs(case_when(. == "Strongly disagree" ~ 1,
. == "Somewhat disagree" ~ 2,
. == "Neither agree nor disagree" ~3,
. == "Somewhat agree" ~ 4,
. == "Strongly agree" ~ 5))) %>%
# code likert scale for variables for Kahan scale into numbers
mutate_at(vars(starts_with (c("K_I","K_H","DISPLACE", "POLLUTE", "HEALTH", "JOBS", "BEAUTY", "PRIDE", "NPRIDE", "DEV", "PROSPER", "RELY"))), funs(case_when(. == "Strongly disagree" ~ 1,
. == "Somewhat disagree" ~ 2,
. == "Neither agree nor disagree" ~3,
. == "Somewhat agree" ~ 4,
. == "Strongly agree" ~ 5))) %>%
# reverse code for likert scale for variables for Kahan scale into numbers
mutate_at(vars(starts_with (c("K_S","K_E"))), funs(case_when(. == "Strongly disagree" ~ 5,
. == "Somewhat disagree" ~ 4,
. == "Neither agree nor disagree" ~3,
. == "Somewhat agree" ~ 2,
. == "Strongly agree" ~ 1))) %>%
# code eco-pol scale variables into numbers
mutate_at(vars(SYSTEMDEMO,SYSTEMRELIGION,SYSTEMTECHNO,SYSTEMTOTAL,WEALTHLIM,MECHANISATION,DECISIONDECEN,INDUSTRYLARGE,ECONOMYGLOBAL,OWNERPVT,OWNERNOREG), funs(case_when(. == "Strongly disagree" ~ 1,
. == "Somewhat disagree" ~ 2,
. == "Neither agree nor disagree" ~3,
. == "Somewhat agree" ~ 4,
. == "Strongly agree" ~ 5))) %>%
# reverse code eco-pol scale variables into numbers
mutate_at(vars(DECISIONCEN, INDUSTRYSMALL, ECONOMYLOCAL, ENVOVERDEV,OWNERPUB, OWNERREG), funs(case_when(. == "Strongly disagree" ~ 5,
. == "Somewhat disagree" ~ 4,
. == "Neither agree nor disagree" ~3,
. == "Somewhat agree" ~ 2,
. == "Strongly agree" ~ 1)))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
codedmodule2
Following table shows the summary statistics of perceived risk from different energy technologies.
# summary statistics for risks from different energy sources
risksummary <- codedmodule2 %>%
summarize_at(vars(starts_with(c("RISKY"))), list(~mean(., na.rm = TRUE), ~median(., na.rm = TRUE), ~sd(.,na.rm = TRUE),~var(.,na.rm = TRUE), ~n())) %>%
#add row index so later spreading indexed correctly
add_rownames()%>%
#melt to long format
gather(technology, value, -rowname) %>%
# separate risky from variable suffix
separate(technology, c("Risky", "var"), extra = "merge", fill = "left") %>%
#separate mean from variable prefix
separate(var, c("technology", "summary")) %>%
# spread summary values back to wide form
spread(summary,value) %>%
#clean up
select(-rowname, -Risky) %>%
arrange(mean) %>%
setNames(paste0('perceivedrisk.', names(.))) %>%
dplyr::rename(technology= perceivedrisk.technology)
## Warning: `add_rownames()` was deprecated in dplyr 1.0.0.
## Please use `tibble::rownames_to_column()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
risksummary
Following table shows the summary statistics of perceived benefit from different energy technologies.
# summary statistics for benefits from different energy sources
benefitsummary <- codedmodule2 %>%
summarize_at(vars(starts_with(c("Ben"))), list(~mean(., na.rm = TRUE), ~median(., na.rm = TRUE), ~sd(.,na.rm = TRUE),~var(.,na.rm = TRUE), ~n())) %>%
#add row index so later spreading indexed correctly
add_rownames()%>%
#melt to long format
gather(technology, value, -rowname) %>%
# separate Ben from variable suffix
separate(technology, c("Ben", "var"), extra = "merge", fill = "left") %>%
#separate mean from variable prefix
separate(var, c("technology", "summary")) %>%
# spread summary values back to wide form
spread(summary,value) %>%
#clean up
select(-rowname, -Ben) %>%
arrange(mean)%>%
setNames(paste0('perceivedbenefit.', names(.))) %>%
dplyr::rename(technology = perceivedbenefit.technology)
benefitsummary
#round off to two decimal places
bar plots for means of perceived risk and benefit from various energy technologies
full_join(risksummary,benefitsummary, by = NULL, copy = FALSE, keep = FALSE) %>%
select(technology, perceivedrisk.mean, perceivedbenefit.mean) %>%
mutate(technology = fct_reorder(technology, perceivedrisk.mean, .desc = TRUE))%>%
gather(key = "level", value = "mean",
perceivedrisk.mean, perceivedbenefit.mean) %>%
ggplot(aes(x=technology, y= mean, fill = level, ))+
geom_bar(stat="identity", position=position_dodge())+
ylim(0,4)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_fill_manual("legend", values = c("perceivedbenefit.mean" = "palegreen3", "perceivedrisk.mean" = "firebrick2"))
## Joining, by = "technology"
bar plots for median of perceived risk and benefit from various energy technologies
full_join(risksummary,benefitsummary, by = NULL, copy = FALSE, keep = FALSE) %>%
select(technology, perceivedrisk.median, perceivedbenefit.median) %>%
mutate(technology = fct_reorder(technology, perceivedrisk.median, .desc = TRUE))%>%
gather(key = "level", value = "median",
perceivedrisk.median, perceivedbenefit.median) %>%
ggplot(aes(x=technology, y= median, fill = level, ))+
geom_bar(stat="identity", position=position_dodge())+
ylim(0,4)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_fill_manual("legend", values = c("perceivedbenefit.median" = "palegreen3", "perceivedrisk.median" = "firebrick2"))
## Joining, by = "technology"
PCA on all indepdendent variables in the data set
#PCA on all variables
pcaresults <- codedmodule2 %>%
select(!starts_with(c("Risky","BEN", "Gender","Caste","Religion","State", "Age", "Otherreligion", "Urban_Rural","Language", "Survey_Date", "N_accept", "N_reject", "N_reluctantlyaccept")))%>%
na.omit()%>%
prcomp(center = TRUE, scale. = TRUE)
summary(pcaresults)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 4.0781 2.58722 2.39651 1.78412 1.58443 1.47616 1.42466
## Proportion of Variance 0.1848 0.07437 0.06381 0.03537 0.02789 0.02421 0.02255
## Cumulative Proportion 0.1848 0.25916 0.32298 0.35834 0.38624 0.41045 0.43300
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 1.38650 1.36336 1.30830 1.24685 1.21705 1.18305 1.13655
## Proportion of Variance 0.02136 0.02065 0.01902 0.01727 0.01646 0.01555 0.01435
## Cumulative Proportion 0.45436 0.47501 0.49403 0.51131 0.52776 0.54331 0.55767
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 1.12046 1.10196 1.08903 1.07521 1.07036 1.04993 1.02694
## Proportion of Variance 0.01395 0.01349 0.01318 0.01285 0.01273 0.01225 0.01172
## Cumulative Proportion 0.57162 0.58511 0.59829 0.61113 0.62386 0.63611 0.64783
## PC22 PC23 PC24 PC25 PC26 PC27 PC28
## Standard deviation 1.0084 0.98229 0.98054 0.9674 0.95674 0.93786 0.93124
## Proportion of Variance 0.0113 0.01072 0.01068 0.0104 0.01017 0.00977 0.00964
## Cumulative Proportion 0.6591 0.66985 0.68053 0.6909 0.70110 0.71087 0.72051
## PC29 PC30 PC31 PC32 PC33 PC34 PC35
## Standard deviation 0.91945 0.91874 0.90892 0.88642 0.87280 0.85548 0.85175
## Proportion of Variance 0.00939 0.00938 0.00918 0.00873 0.00846 0.00813 0.00806
## Cumulative Proportion 0.72990 0.73928 0.74846 0.75719 0.76565 0.77379 0.78185
## PC36 PC37 PC38 PC39 PC40 PC41 PC42
## Standard deviation 0.83638 0.82895 0.81339 0.80049 0.79529 0.78025 0.76635
## Proportion of Variance 0.00777 0.00764 0.00735 0.00712 0.00703 0.00676 0.00653
## Cumulative Proportion 0.78962 0.79725 0.80461 0.81173 0.81875 0.82552 0.83204
## PC43 PC44 PC45 PC46 PC47 PC48 PC49
## Standard deviation 0.75496 0.74127 0.73353 0.71927 0.70928 0.70185 0.69952
## Proportion of Variance 0.00633 0.00611 0.00598 0.00575 0.00559 0.00547 0.00544
## Cumulative Proportion 0.83838 0.84448 0.85046 0.85621 0.86180 0.86727 0.87271
## PC50 PC51 PC52 PC53 PC54 PC55 PC56
## Standard deviation 0.69173 0.68714 0.66536 0.65994 0.65445 0.64642 0.63999
## Proportion of Variance 0.00532 0.00525 0.00492 0.00484 0.00476 0.00464 0.00455
## Cumulative Proportion 0.87802 0.88327 0.88819 0.89303 0.89779 0.90243 0.90698
## PC57 PC58 PC59 PC60 PC61 PC62 PC63
## Standard deviation 0.63124 0.61434 0.60588 0.5997 0.58925 0.5847 0.57783
## Proportion of Variance 0.00443 0.00419 0.00408 0.0040 0.00386 0.0038 0.00371
## Cumulative Proportion 0.91141 0.91560 0.91968 0.9237 0.92753 0.9313 0.93504
## PC64 PC65 PC66 PC67 PC68 PC69 PC70
## Standard deviation 0.56166 0.56020 0.54329 0.53562 0.53535 0.53240 0.52502
## Proportion of Variance 0.00351 0.00349 0.00328 0.00319 0.00318 0.00315 0.00306
## Cumulative Proportion 0.93855 0.94204 0.94532 0.94850 0.95169 0.95484 0.95790
## PC71 PC72 PC73 PC74 PC75 PC76 PC77
## Standard deviation 0.51368 0.50376 0.49616 0.4927 0.47740 0.47053 0.46332
## Proportion of Variance 0.00293 0.00282 0.00274 0.0027 0.00253 0.00246 0.00239
## Cumulative Proportion 0.96083 0.96365 0.96639 0.9691 0.97162 0.97408 0.97646
## PC78 PC79 PC80 PC81 PC82 PC83 PC84
## Standard deviation 0.45840 0.45376 0.44332 0.42916 0.41769 0.41000 0.40037
## Proportion of Variance 0.00233 0.00229 0.00218 0.00205 0.00194 0.00187 0.00178
## Cumulative Proportion 0.97880 0.98108 0.98327 0.98531 0.98725 0.98912 0.99090
## PC85 PC86 PC87 PC88 PC89 PC90
## Standard deviation 0.39496 0.39030 0.36983 0.36479 0.35288 0.34095
## Proportion of Variance 0.00173 0.00169 0.00152 0.00148 0.00138 0.00129
## Cumulative Proportion 0.99263 0.99433 0.99585 0.99732 0.99871 1.00000
var_explained = pcaresults$sdev^2 / sum(pcaresults$sdev^2)
Scree plot of results from PCA
qplot(c(1:90), var_explained) +
geom_line() +
xlab("Principal Component") +
ylab("Variance Explained") +
ggtitle("Scree Plot") +
ylim(0, 0.20)+
xlim(1,7)
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 row(s) containing missing values (geom_path).
Coding and Combining items on Kahan scale
#median score for Kahan invidualism scale
codedmodule2 %>%
select((starts_with(c("K_I", "K_S"))))%>%
gather(key = "K_I", value = "score")%>%
na.omit()%>%
summarise(median = median(c(score)))
# Kahan scale median score for Hierarchy scale
codedmodule2 %>%
select((starts_with(c("K_H", "K_E"))))%>%
gather(key = "K_H", value = "score")%>%
na.omit()%>%
summarise(median = median(c(score)))
#calculating individualism score and hierarchy score for each respondent
Kscalescores <- codedmodule2 %>%
select(starts_with(c("Risky","K_I","K_S", "K_H", "K_E")))%>%
rowwise()%>%
na.omit()%>%
dplyr::mutate(Individualism_score = mean(c_across(K_IINTRFER:K_SPROTECT)))%>%
dplyr::mutate(Hierarchy_score = mean(c_across(K_HEQUAL:K_ERADEQ2)))%>%
dplyr::select(!starts_with(c("K_I","K_S", "K_H", "K_E")))
Kscalescores
#scatter plot of Kahan scale scores around the median scores on Individualism and Hierarchy scales.
Kscalescores %>%
ggplot(aes(Individualism_score, Hierarchy_score))+
geom_point()+
geom_hline(yintercept=2, colour="black", lwd=1)+
geom_vline(xintercept=3, colour="black", lwd=1)
# checking distribution of scores on Individualism scale
Kscalescores %>%
ggplot(aes(x=Individualism_score, y = ..count..), fill = "lightgray")+
geom_density()
# checking distribution of scores on Hierarchy scale
Kscalescores %>%
ggplot(aes(x=Hierarchy_score, y = ..count..), fill = "lightgray")+
geom_density()
Coding and combining items on the eco-political scale - characteristics of the perceiver
ecopolscalescores <- codedmodule2 %>%
select(starts_with(c("Risky","SYSTEMDEMO","SYSTEMRELIGION","SYSTEMTECHNO","SYSTEMTOTAL","WEALTHLIM","MECHANISATION","DECISIONDECEN","DECISIONCEN","INDUSTRYLARGE","INDUSTRYSMALL","ECONOMYGLOBAL","ECONOMYLOCAL", "ENVOVERDEV","DEVOVERENV","OWNERPVT","OWNERNOREG","OWNERPUB", "OWNERREG", "Gender","Caste","Religion", "State")))%>%
rowwise()%>%
na.omit()%>%
dplyr::mutate(Pro_decentralisation = mean(c_across("DECISIONDECEN":"DECISIONCEN")))%>%
dplyr::mutate(Pro_largescale = mean(c_across("INDUSTRYLARGE":"INDUSTRYSMALL")))%>%
dplyr::mutate(Pro_global = mean(c_across("ECONOMYGLOBAL":"ECONOMYLOCAL")))%>%
dplyr::mutate(Pro_ecogrowth = mean(c_across("ENVOVERDEV":"DEVOVERENV")))%>%
dplyr::mutate(Pro_private = mean(c_across("OWNERPVT":"OWNERREG")))%>%
dplyr::select(!c("DECISIONDECEN","DECISIONCEN", "INDUSTRYLARGE","INDUSTRYSMALL","ECONOMYGLOBAL", "ECONOMYLOCAL","ENVOVERDEV", "DEVOVERENV","OWNERPVT","OWNERNOREG","OWNERPUB", "OWNERREG"))
ecopolscalescores
PCA test on the new eco-pol scale - characteristics of the perceiver
# PCA test on qualities of the perceiver
ecopolscalepca <- ecopolscalescores %>%
select(!starts_with(c("Risky","Gender","Caste","Religion","State")))%>%
prcomp(center = TRUE,scale. = TRUE)
var_explained_ecopol = ecopolscalepca$sdev^2 / sum(ecopolscalepca$sdev^2)
summary(ecopolscalepca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.4705 1.2373 1.1700 1.0027 0.96948 0.90729 0.86862
## Proportion of Variance 0.1966 0.1392 0.1245 0.0914 0.08545 0.07483 0.06859
## Cumulative Proportion 0.1966 0.3358 0.4602 0.5516 0.63706 0.71189 0.78048
## PC8 PC9 PC10 PC11
## Standard deviation 0.83091 0.80331 0.78471 0.68059
## Proportion of Variance 0.06276 0.05866 0.05598 0.04211
## Cumulative Proportion 0.84325 0.90191 0.95789 1.00000
qplot(c(1:11), var_explained_ecopol) +
geom_line() +
xlab("Principal Component") +
ylab("Variance Explained") +
ggtitle("Scree Plot") +
ylim(0.05, 0.20)+
xlim(1,7)
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 row(s) containing missing values (geom_path).